{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e91776f7-d19f-4e27-90be-761c658ceed1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     A  B  C    D\n",
      "0  1.0  3  5  7.0\n",
      "1  2.0  4  6  5.0\n",
      "2  NaN  4  7  NaN\n",
      "3  4.0  5  8  NaN\n"
     ]
    }
   ],
   "source": [
    "# 创建包含缺失值的dataframe\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "na_df = pd.DataFrame({'A':[1,2,np.NaN,4],\n",
    "                     'B':[3,4,4,5],\n",
    "                     'C':[5,6,7,8],\n",
    "                     'D':[7,5,np.NaN,np.NaN]})\n",
    "print(na_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "9baddeb5-c3d9-4e43-aa7c-2f9e0ef726fa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     A  B  C    D\n",
      "0  1.0  3  5  7.0\n",
      "1  2.0  4  6  5.0\n",
      "2  NaN  4  7  NaN\n",
      "3  4.0  5  8  NaN\n",
      "..................\n",
      "       A      B      C      D\n",
      "0  False  False  False  False\n",
      "1  False  False  False  False\n",
      "2   True  False  False   True\n",
      "3  False  False  False   True\n",
      "...................\n",
      "       A     B     C      D\n",
      "0   True  True  True   True\n",
      "1   True  True  True   True\n",
      "2  False  True  True  False\n",
      "3   True  True  True  False\n",
      "....................\n",
      "       A      B      C      D\n",
      "0  False  False  False  False\n",
      "1  False  False  False  False\n",
      "2   True  False  False   True\n",
      "3  False  False  False   True\n",
      "....................\n",
      "       A     B     C      D\n",
      "0   True  True  True   True\n",
      "1   True  True  True   True\n",
      "2  False  True  True  False\n",
      "3   True  True  True  False\n",
      "A    1\n",
      "B    0\n",
      "C    0\n",
      "D    2\n",
      "dtype: int64\n",
      "...............\n",
      "A    1\n",
      "B    0\n",
      "C    0\n",
      "D    2\n",
      "dtype: int64\n",
      "     A  B  C   D\n",
      "2  NaN  4  7 NaN\n",
      "3  4.0  5  8 NaN\n",
      "......................\n",
      "Unexpected exception formatting exception. Falling back to standard exception\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Traceback (most recent call last):\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 3577, in run_code\n",
      "    exec(code_obj, self.user_global_ns, self.user_ns)\n",
      "  File \"C:\\Users\\abc18\\AppData\\Local\\Temp\\ipykernel_16352\\439229667.py\", line 36, in <module>\n",
      "    print(highlighted_df.render())\n",
      "          ^^^^^^^^^^^^^^^^^^^^^\n",
      "AttributeError: 'Styler' object has no attribute 'render'. Did you mean: '_render'?\n",
      "\n",
      "During handling of the above exception, another exception occurred:\n",
      "\n",
      "Traceback (most recent call last):\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 2168, in showtraceback\n",
      "    stb = self.InteractiveTB.structured_traceback(\n",
      "          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 1454, in structured_traceback\n",
      "    return FormattedTB.structured_traceback(\n",
      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 1345, in structured_traceback\n",
      "    return VerboseTB.structured_traceback(\n",
      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 1192, in structured_traceback\n",
      "    formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n",
      "                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 1107, in format_exception_as_a_whole\n",
      "    frames.append(self.format_record(record))\n",
      "                  ^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 989, in format_record\n",
      "    frame_info.lines, Colors, self.has_colors, lvals\n",
      "    ^^^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 801, in lines\n",
      "    return self._sd.lines\n",
      "           ^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\stack_data\\utils.py\", line 145, in cached_property_wrapper\n",
      "    value = obj.__dict__[self.func.__name__] = self.func(obj)\n",
      "                                               ^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\stack_data\\core.py\", line 698, in lines\n",
      "    pieces = self.included_pieces\n",
      "             ^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\stack_data\\utils.py\", line 145, in cached_property_wrapper\n",
      "    value = obj.__dict__[self.func.__name__] = self.func(obj)\n",
      "                                               ^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\stack_data\\core.py\", line 645, in included_pieces\n",
      "    scope_pieces = self.scope_pieces\n",
      "                   ^^^^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\stack_data\\utils.py\", line 145, in cached_property_wrapper\n",
      "    value = obj.__dict__[self.func.__name__] = self.func(obj)\n",
      "                                               ^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\stack_data\\core.py\", line 585, in scope_pieces\n",
      "    for piece in self.source.pieces\n",
      "                 ^^^^^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\stack_data\\utils.py\", line 145, in cached_property_wrapper\n",
      "    value = obj.__dict__[self.func.__name__] = self.func(obj)\n",
      "                                               ^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\stack_data\\core.py\", line 90, in pieces\n",
      "    return list(self._clean_pieces())\n",
      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"D:\\anaconda3\\Lib\\site-packages\\stack_data\\core.py\", line 114, in _clean_pieces\n",
      "    raise AssertionError(\"Pieces mismatches: %s\" % mismatches)\n",
      "AssertionError: Pieces mismatches: [{59, 51}]\n"
     ]
    }
   ],
   "source": [
    "# 查看包含的空缺值\n",
    "\"\"\" \n",
    "缺失值的检测可以采用isnull（）、notnull（）、isna（）和notna（）方法\n",
    "均会返回一个由布尔值组成、与原对象形状相同的新对象\n",
    "isnull和isna用法相同，会在检测到缺失值的位置标记为True\n",
    "notnull和notna用法相同，会在检测到缺失值的位置标记为False\n",
    "\"\"\"\n",
    "# 创建包含缺失值的dataframe\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "na_df = pd.DataFrame({'A':[1,2,np.NaN,4],\n",
    "                     'B':[3,4,4,5],\n",
    "                     'C':[5,6,7,8],\n",
    "                     'D':[7,5,np.NaN,np.NaN]})\n",
    "print(na_df)\n",
    "print('..................')\n",
    "print(na_df.isna())\n",
    "print('...................')\n",
    "print(na_df.notnull())\n",
    "print('....................')\n",
    "print(na_df.isnull())\n",
    "print('....................')\n",
    "print(na_df.notna())\n",
    "\n",
    "# 计算每列缺失值的总和\n",
    "print(na_df.isnull().sum())\n",
    "print('...............')\n",
    "print(na_df.isna().sum())\n",
    "\n",
    "# 看看缺失值所在的行\n",
    "print(na_df[na_df.isnull().T.any()])\n",
    "print('......................')\n",
    "\n",
    "# 高亮缺失值\n",
    "highlighted_df = na_df.style.highlight_null('skyblue')\n",
    "print(highlighted_df.render())\n",
    "\n",
    "# 删除缺失值：把缺失值出现的行全部删掉\n",
    "print(na_df.dropna())\n",
    "print('.....................')\n",
    "'''删除缺失值：dropna（）用于删除缺失值所在的一行或一列数据，并返回一个删除缺失值后的新对象\n",
    "   dataframe.dropna(axis = 0,how = 'any',thresh = None,subset = None,inplace = False)\n",
    "   axis：表示是否删除包含缺失值的行或列\n",
    "   how：表示删除缺失值的方式\n",
    "   thresh：表示删除指定列的缺失值\n",
    "   inplace：表示是否操作原数据'''\n",
    "\n",
    "# 保留至少有三个非nan的值的行\n",
    "print(na_df.dropna(thresh=3))\n",
    "\n",
    "'''填充缺失值\n",
    "    fillna既可以使用指定的数据填充，也可以用缺失值前面或后面的数据填充\n",
    "    DataFrame.fillna(value = None,method = None,axis = None,\n",
    "    inplace = False,limit = None,doencast = None)\n",
    "    method：表示填充的方式，默认值为None。该参数还支持‘pad’，‘ffill’‘backfill’‘bfill’几种取值。\n",
    "    其中pad和ffill表示将最后一个有效值向后传播，也就是使用缺失值前面的有效值填充缺失值。backfill\n",
    "    和bfill表示将最后一个有效值向前传播，也就是使用缺失值后面的有效值填充缺失值。\n",
    "    limit：表示可以连续填充的最大数量。'''\n",
    "# 缺失值补全，整体填充，将全部缺失值替换成*\n",
    "print(na_df.fillna('*'))\n",
    "# 缺失值补全：平均数填充指定的列\n",
    "# 计算A列的平均数，并保留一位小数\n",
    "col_a = np.around(np.mean(na_df['A']),1)\n",
    "# 计算D列的平均数，并保留一位小数\n",
    "col_d = np.around(np.mean(na_df['D']),1)\n",
    "# 将计算的平均数填充到指定的列\n",
    "na_df.fillna({'A':col_a,'D':col_d})\n",
    "\n",
    "# 缺失值补全：上下均值填充\n",
    "print(na_df.fillna(na_df.interpolate()))\n",
    "\n",
    "\n",
    "'''\n",
    "插补缺失值\n",
    "pandas中提供了插补缺失值的方法interpolate，会根据相应的插值方法进行填充\n",
    "  DataFrame.interpolate(method = 'linear',axis = 0,limit = None,inplace = False,\n",
    "  limit_direction = None,limit_area = None,downcast = None,**kwargs)\n",
    "  method：表示使用的插值方法，该参数支持’linear‘（默认值）、time、index、values、\n",
    "  nearest、barycentric共六种取值，其中linear代表采用线性插值法进行填充；time代表\n",
    "  根据时间长短进行填充；index、values代表采用索引的实际数值进行填充；nearest代表\n",
    "  采用最临近插值法进行填充；barycentric：代表采用重心坐标插值法进行填充；limit_direction\n",
    "  代表按照指定方向对连续的nan进行填充'''\n",
    "# 缺失值补全：线性插值\n",
    "print(na_df.interpolate(method='linear'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "b745f3c5-deb2-4861-a2cb-4c84807c9cc4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  name  age  height gender\n",
      "0  刘婷婷   24     162      女\n",
      "1   王淼   23     165      女\n",
      "2   彭艳   29     175      男\n",
      "3   刘华   22     175      男\n",
      "4   刘华   22     175      男\n",
      "5   周三   27     178      男\n",
      "0    False\n",
      "1    False\n",
      "2    False\n",
      "3    False\n",
      "4     True\n",
      "5    False\n",
      "dtype: bool\n",
      "  name  age  height gender\n",
      "4   刘华   22     175      男\n",
      "  name  age  height gender\n",
      "1   王淼   23     165      女\n",
      "3   刘华   22     175      男\n",
      "4   刘华   22     175      男\n",
      "5   周三   27     178      男\n",
      "  name  age  height gender\n",
      "0  刘婷婷   24     162      女\n",
      "1   王淼   23     165      女\n",
      "2   彭艳   29     175      男\n",
      "3   刘华   22     175      男\n",
      "5   周三   27     178      男\n",
      "  name  age  height gender\n",
      "0  刘婷婷   24     162      女\n",
      "1   王淼   23     165      女\n",
      "2   彭艳   29     175      男\n",
      "4   刘华   22     175      男\n",
      "5   周三   27     178      男\n"
     ]
    }
   ],
   "source": [
    "# 创建dataframe对象\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "df = pd.DataFrame({'name':['刘婷婷','王淼','彭艳','刘华','刘华','周三'],\n",
    "                  'age':[24,23,29,22,22,27],\n",
    "                  'height':[162,165,175,175,175,178],\n",
    "                  'gender':['女','女','男','男','男','男']})\n",
    "print(df)\n",
    "\n",
    "'''重复值的检测\n",
    "   pandas中使用duplicated方法来检测数据中的重复值\n",
    "   DataFrame.duplicated(subest = None,keep = 'first')\n",
    "   subest：表示识别重复项的列索引或列索引序列，默认标识所有的列序列\n",
    "   keep：表示采用哪种方式保留重复项，该参数可以取值为first、last、false，其中first表示代表删除重复\n",
    "   项，仅保留第一次出现的数据项；last代表删除重复项，仅保留最后一次出现的数据项；false表示所有相同的\n",
    "   数据都被标记为重复项\n",
    "   duplicated（）方法检测完数据后会返回一个由布尔值组成的series类对象，该对象中若包含true，说明true\n",
    "   对应的一行数据为重复项'''\n",
    "# 检测df对象的重复值\n",
    "print(df.duplicated())\n",
    "# 查找重复值：将全部重复值所在的行筛选出来\n",
    "print(df[df.duplicated()])\n",
    "# 查找重复值：指定列\n",
    "# 上面时所有列完全重复的情况下，但有时我们只需要根据某列查找重复值\n",
    "print(df[df.duplicated(['gender'])])\n",
    "\n",
    "\n",
    "'''\n",
    "重复值的删除:\n",
    "    使用drop_duplicates()进行删除\n",
    "    DataFrame.frop_duplicates(subet = None,keep = 'first',inplace = false,ignore_index = false)\n",
    "    keep：表示采用哪种方式保留重复项，该参数可以取值为first、last和false其中first表示代表删除重复\n",
    "    项，仅保留第一次出现的数据项；last代表删除重复项，仅保留最后一次出现的数据项；false表示所有相同的\n",
    "    数据都被标记为重复项\n",
    "    inplace：表示是否放弃副本数据，返回新的数据，默认为false\n",
    "    ignore_index：表示是否对删除重复值后的对象的行索引重新排序，默认为flase\n",
    "'''\n",
    "# 删除重复值\n",
    "print(df.drop_duplicates())\n",
    "# 删除全部的重复值，但保留最后一次出现的值\n",
    "print(df.drop_duplicates(keep = 'last'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "542210d9-68c8-4c80-862a-3261f22c14b3",
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "Could not convert string '100+95-9990-9485-8980-8475-7970-7465-6960-6455-5950-5445-4940-4435-3930-3425-2920-2415-1910-145-90-4' to numeric",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[17], line 6\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m      4\u001b[0m data \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_excel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mage.xlsx\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m----> 6\u001b[0m u \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mage\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmean()\n\u001b[0;32m      7\u001b[0m \u001b[38;5;66;03m# 计算均值\u001b[39;00m\n\u001b[0;32m      8\u001b[0m std \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mage\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mstd()\n",
      "File \u001b[1;32mD:\\anaconda3\\Lib\\site-packages\\pandas\\core\\series.py:6549\u001b[0m, in \u001b[0;36mSeries.mean\u001b[1;34m(self, axis, skipna, numeric_only, **kwargs)\u001b[0m\n\u001b[0;32m   6541\u001b[0m \u001b[38;5;129m@doc\u001b[39m(make_doc(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmean\u001b[39m\u001b[38;5;124m\"\u001b[39m, ndim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m))\n\u001b[0;32m   6542\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmean\u001b[39m(\n\u001b[0;32m   6543\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   6547\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[0;32m   6548\u001b[0m ):\n\u001b[1;32m-> 6549\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m NDFrame\u001b[38;5;241m.\u001b[39mmean(\u001b[38;5;28mself\u001b[39m, axis, skipna, numeric_only, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32mD:\\anaconda3\\Lib\\site-packages\\pandas\\core\\generic.py:12420\u001b[0m, in \u001b[0;36mNDFrame.mean\u001b[1;34m(self, axis, skipna, numeric_only, **kwargs)\u001b[0m\n\u001b[0;32m  12413\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmean\u001b[39m(\n\u001b[0;32m  12414\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m  12415\u001b[0m     axis: Axis \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m  12418\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[0;32m  12419\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Series \u001b[38;5;241m|\u001b[39m \u001b[38;5;28mfloat\u001b[39m:\n\u001b[1;32m> 12420\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_stat_function(\n\u001b[0;32m  12421\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmean\u001b[39m\u001b[38;5;124m\"\u001b[39m, nanops\u001b[38;5;241m.\u001b[39mnanmean, axis, skipna, numeric_only, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m  12422\u001b[0m     )\n",
      "File \u001b[1;32mD:\\anaconda3\\Lib\\site-packages\\pandas\\core\\generic.py:12377\u001b[0m, in \u001b[0;36mNDFrame._stat_function\u001b[1;34m(self, name, func, axis, skipna, numeric_only, **kwargs)\u001b[0m\n\u001b[0;32m  12373\u001b[0m nv\u001b[38;5;241m.\u001b[39mvalidate_func(name, (), kwargs)\n\u001b[0;32m  12375\u001b[0m validate_bool_kwarg(skipna, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mskipna\u001b[39m\u001b[38;5;124m\"\u001b[39m, none_allowed\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m> 12377\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reduce(\n\u001b[0;32m  12378\u001b[0m     func, name\u001b[38;5;241m=\u001b[39mname, axis\u001b[38;5;241m=\u001b[39maxis, skipna\u001b[38;5;241m=\u001b[39mskipna, numeric_only\u001b[38;5;241m=\u001b[39mnumeric_only\n\u001b[0;32m  12379\u001b[0m )\n",
      "File \u001b[1;32mD:\\anaconda3\\Lib\\site-packages\\pandas\\core\\series.py:6457\u001b[0m, in \u001b[0;36mSeries._reduce\u001b[1;34m(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)\u001b[0m\n\u001b[0;32m   6452\u001b[0m     \u001b[38;5;66;03m# GH#47500 - change to TypeError to match other methods\u001b[39;00m\n\u001b[0;32m   6453\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[0;32m   6454\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSeries.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not allow \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkwd_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnumeric_only\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   6455\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwith non-numeric dtypes.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   6456\u001b[0m     )\n\u001b[1;32m-> 6457\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m op(delegate, skipna\u001b[38;5;241m=\u001b[39mskipna, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n",
      "File \u001b[1;32mD:\\anaconda3\\Lib\\site-packages\\pandas\\core\\nanops.py:147\u001b[0m, in \u001b[0;36mbottleneck_switch.__call__.<locals>.f\u001b[1;34m(values, axis, skipna, **kwds)\u001b[0m\n\u001b[0;32m    145\u001b[0m         result \u001b[38;5;241m=\u001b[39m alt(values, axis\u001b[38;5;241m=\u001b[39maxis, skipna\u001b[38;5;241m=\u001b[39mskipna, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[0;32m    146\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 147\u001b[0m     result \u001b[38;5;241m=\u001b[39m alt(values, axis\u001b[38;5;241m=\u001b[39maxis, skipna\u001b[38;5;241m=\u001b[39mskipna, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[0;32m    149\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n",
      "File \u001b[1;32mD:\\anaconda3\\Lib\\site-packages\\pandas\\core\\nanops.py:404\u001b[0m, in \u001b[0;36m_datetimelike_compat.<locals>.new_func\u001b[1;34m(values, axis, skipna, mask, **kwargs)\u001b[0m\n\u001b[0;32m    401\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m datetimelike \u001b[38;5;129;01mand\u001b[39;00m mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    402\u001b[0m     mask \u001b[38;5;241m=\u001b[39m isna(values)\n\u001b[1;32m--> 404\u001b[0m result \u001b[38;5;241m=\u001b[39m func(values, axis\u001b[38;5;241m=\u001b[39maxis, skipna\u001b[38;5;241m=\u001b[39mskipna, mask\u001b[38;5;241m=\u001b[39mmask, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    406\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m datetimelike:\n\u001b[0;32m    407\u001b[0m     result \u001b[38;5;241m=\u001b[39m _wrap_results(result, orig_values\u001b[38;5;241m.\u001b[39mdtype, fill_value\u001b[38;5;241m=\u001b[39miNaT)\n",
      "File \u001b[1;32mD:\\anaconda3\\Lib\\site-packages\\pandas\\core\\nanops.py:720\u001b[0m, in \u001b[0;36mnanmean\u001b[1;34m(values, axis, skipna, mask)\u001b[0m\n\u001b[0;32m    718\u001b[0m count \u001b[38;5;241m=\u001b[39m _get_counts(values\u001b[38;5;241m.\u001b[39mshape, mask, axis, dtype\u001b[38;5;241m=\u001b[39mdtype_count)\n\u001b[0;32m    719\u001b[0m the_sum \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39msum(axis, dtype\u001b[38;5;241m=\u001b[39mdtype_sum)\n\u001b[1;32m--> 720\u001b[0m the_sum \u001b[38;5;241m=\u001b[39m _ensure_numeric(the_sum)\n\u001b[0;32m    722\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m axis \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(the_sum, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mndim\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[0;32m    723\u001b[0m     count \u001b[38;5;241m=\u001b[39m cast(np\u001b[38;5;241m.\u001b[39mndarray, count)\n",
      "File \u001b[1;32mD:\\anaconda3\\Lib\\site-packages\\pandas\\core\\nanops.py:1701\u001b[0m, in \u001b[0;36m_ensure_numeric\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m   1698\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (is_float(x) \u001b[38;5;129;01mor\u001b[39;00m is_integer(x) \u001b[38;5;129;01mor\u001b[39;00m is_complex(x)):\n\u001b[0;32m   1699\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m   1700\u001b[0m         \u001b[38;5;66;03m# GH#44008, GH#36703 avoid casting e.g. strings to numeric\u001b[39;00m\n\u001b[1;32m-> 1701\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not convert string \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mx\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m to numeric\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m   1702\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m   1703\u001b[0m         x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mfloat\u001b[39m(x)\n",
      "\u001b[1;31mTypeError\u001b[0m: Could not convert string '100+95-9990-9485-8980-8475-7970-7465-6960-6455-5950-5445-4940-4435-3930-3425-2920-2415-1910-145-90-4' to numeric"
     ]
    }
   ],
   "source": [
    "# k-s检测：检测样本数据是否符合正态分布\n",
    "import scipy.stats as stats\n",
    "import pandas as pd\n",
    "data = pd.read_excel('age.xlsx')\n",
    "\n",
    "u = data['age'].mean()\n",
    "# 计算均值\n",
    "std = data['age'].std()\n",
    "# 计算标准差\n",
    "stats.kstest(data['age'],'norm',(u,std))\n",
    "# 检测是否符合正态分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "0172f6d0-a841-4113-a86f-ce04f40f67e3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "        name  old  weight\n",
      "id1    user1  221     121\n",
      "id2    user2   21     122\n",
      "id3    user3   20     132\n",
      "id4    user4   16     135\n",
      "id5    user5   13     128\n",
      "id6    user6   22     124\n",
      "id7    user7   18     129\n",
      "id8    user8   19     133\n",
      "id9    user9   20     362\n",
      "id10  user10   20     135\n",
      "id11  user11   19     128\n",
      "id12  user12   23     124\n",
      "id13  user13   22     129\n",
      "id14  user14   20     135\n",
      "id15  user15   19     128\n",
      "id16  user16   23     124\n",
      "id17  user17   22     129\n",
      "id18  user18   21      73\n",
      "KstestResult(statistic=0.5118230511202165, pvalue=6.693292545949361e-05, statistic_location=23, statistic_sign=1)\n",
      "平均值mean_data:31.055555555555557,标准差std_data:47.46822122371758\n",
      "平均值mean_data:19.88235294117647,标准差std_data:2.5466240628814956\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>old</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>id1</th>\n",
       "      <td>user1</td>\n",
       "      <td>23</td>\n",
       "      <td>121</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id2</th>\n",
       "      <td>user2</td>\n",
       "      <td>21</td>\n",
       "      <td>122</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id3</th>\n",
       "      <td>user3</td>\n",
       "      <td>20</td>\n",
       "      <td>132</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id4</th>\n",
       "      <td>user4</td>\n",
       "      <td>16</td>\n",
       "      <td>135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id5</th>\n",
       "      <td>user5</td>\n",
       "      <td>16</td>\n",
       "      <td>128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id6</th>\n",
       "      <td>user6</td>\n",
       "      <td>22</td>\n",
       "      <td>124</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id7</th>\n",
       "      <td>user7</td>\n",
       "      <td>18</td>\n",
       "      <td>129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id8</th>\n",
       "      <td>user8</td>\n",
       "      <td>19</td>\n",
       "      <td>133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id9</th>\n",
       "      <td>user9</td>\n",
       "      <td>20</td>\n",
       "      <td>362</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id10</th>\n",
       "      <td>user10</td>\n",
       "      <td>20</td>\n",
       "      <td>135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id11</th>\n",
       "      <td>user11</td>\n",
       "      <td>19</td>\n",
       "      <td>128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id12</th>\n",
       "      <td>user12</td>\n",
       "      <td>23</td>\n",
       "      <td>124</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id13</th>\n",
       "      <td>user13</td>\n",
       "      <td>22</td>\n",
       "      <td>129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id14</th>\n",
       "      <td>user14</td>\n",
       "      <td>20</td>\n",
       "      <td>135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id15</th>\n",
       "      <td>user15</td>\n",
       "      <td>19</td>\n",
       "      <td>128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id16</th>\n",
       "      <td>user16</td>\n",
       "      <td>23</td>\n",
       "      <td>124</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id17</th>\n",
       "      <td>user17</td>\n",
       "      <td>22</td>\n",
       "      <td>129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id18</th>\n",
       "      <td>user18</td>\n",
       "      <td>21</td>\n",
       "      <td>73</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        name  old  weight\n",
       "id1    user1   23     121\n",
       "id2    user2   21     122\n",
       "id3    user3   20     132\n",
       "id4    user4   16     135\n",
       "id5    user5   16     128\n",
       "id6    user6   22     124\n",
       "id7    user7   18     129\n",
       "id8    user8   19     133\n",
       "id9    user9   20     362\n",
       "id10  user10   20     135\n",
       "id11  user11   19     128\n",
       "id12  user12   23     124\n",
       "id13  user13   22     129\n",
       "id14  user14   20     135\n",
       "id15  user15   19     128\n",
       "id16  user16   23     124\n",
       "id17  user17   22     129\n",
       "id18  user18   21      73"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "data = {'name': ['user1', 'user2', 'user3', 'user4','user5','user6','user7','user8','user9',\n",
    "                 'user10','user11', 'user12', 'user13', 'user14','user15','user16','user17',\n",
    "                 'user18'],\n",
    "        'old': [221,21,20,16,13,22,18,19,20,20,19,23,22,20,19,23,22,21],\n",
    "        'weight':[121,122,132,135,128,124,129,133,362,135,128,124,129,135,128,124,129,73]}\n",
    "columns1=['name', 'old','weight']\n",
    "index1=['id1', 'id2','id3','id4','id5','id6','id7','id8','id9','id10','id11', 'id12',\n",
    "        'id13','id14','id15','id16','id17','id18']\n",
    "df1= pd.DataFrame(data,columns=columns1,index=index1)\n",
    "print(df1)\n",
    "\n",
    "\n",
    "# 判断是否符合正态分布\n",
    "import scipy.stats as stats\n",
    "data = df1\n",
    "# 计算均值\n",
    "u = data['old'].mean()\n",
    "# 计算标准差\n",
    "std = data['old'].std()\n",
    "# 检测是否符合正态分布\n",
    "print(stats.kstest(data['old'],'norm',(u,std)))\n",
    "\n",
    "\n",
    "# 进行异常值检测\n",
    "def three_sigma(ser):\n",
    "    \"\"\"\n",
    "    :param ser：被检测的数据，接收DataFrame的一列数据\n",
    "    :return：异常值及其对应的行索引\n",
    "    \"\"\"\n",
    "    # 计算平均值\n",
    "    mean_data = ser.mean()\n",
    "    # 计算标准差\n",
    "    std_data = ser.std()\n",
    "    print(\"平均值mean_data:{},标准差std_data:{}\".format(mean_data,std_data))\n",
    "\n",
    "    rule = (mean_data-3*std_data>ser) | (mean_data+3*std_data<ser)\n",
    "\n",
    "    # 返回异常值的行索引\n",
    "    index = np.arange(ser.shape[0])[rule]\n",
    "    # 获取异常值\n",
    "    outliers = ser.iloc[index]\n",
    "    return outliers\n",
    "\n",
    "# 对df的old进行异常值检测\n",
    "three_sigma(df1['old'])\n",
    "\n",
    "# 删除指定索引的行后，查看异常值情况\n",
    "df1_drop = df1.drop(['id1'])\n",
    "three_sigma(df1_drop['old'])\n",
    "\n",
    "\n",
    "# 基于替换的方式处理异常值：上面看到了最大值和最小值26.5和14.5\n",
    "topnum1 = 26.5\n",
    "bottomnum1 = 14.5\n",
    "\n",
    "replace_value1 = df1['old'][df1['old'] < 26.5].max()\n",
    "df1.loc[df1['old'] > topnum1,'old'] = replace_value1\n",
    "\n",
    "replace_value2 = df1['old'][df1['old'] > bottomnum1].min()\n",
    "df1.loc[df1['old'] < bottomnum1,'old'] = replace_value2\n",
    "\n",
    "df1"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "92cc580a-ea6f-4a42-8a7a-2055a0ae14a5",
   "metadata": {},
   "source": [
    "数据清理的异常值处理箱型图没看\n",
    "箱形图对检测数据没有任何要求，即使不符\n",
    "合正态分布的数据集是能被检测的\r\n",
    "箱形图是一种用于显示一组数据分散情况的统计图，它通常上\r\n",
    "边缘、上四分位数、中位数、下四分位数、下边缘和异值组\r\n",
    "成。箱形图能直观地反映出一组数据的分散情况，一旦中出现\r\n",
    "离群点（远离大多数值的点），就认为该离群点可能\n",
    "为了能够直观地从箱形图中查看异常值，pandas中提供了两个绘\n",
    "制箱形图的函数：plot()和boxplot()，其中plot()函数用于根\r\n",
    "Series和DataFrame类对象绘制箱形图，该箱形图中默认不显\r\n",
    "示网格线； boxplot()函数用于根据DataFame类对象绘箱形\r\n",
    "图，该箱形图中默认会显示\n",
    "DataFrame.boxplot(column=None, by=None, ax=None, \r\n",
    "fontsize=None, \r\n",
    "        rot=0, grid=True, figsize=None, \r\n",
    "layout=None, return_type=None, \r\n",
    "        backend=None, **kwa\n",
    "        rot：表示箱形图坐标轴旋转角度。\r\n",
    "grid：表示箱形图窗口尺寸大小。\r\n",
    "return_type：表示返回的对象类型，该参数取值可为’axes \r\n",
    "、‘dict’和’bot\n",
    "如果需要从箱形图中获取异常值及其对应的索引，那么可以根据\n",
    "箱形图中异常值的范围计算，具体计算方式为：首先对数据集\r\n",
    "行排序，然后根据排序后的数据分别计算Q1、Q3和IQR的值最\r\n",
    "后根据异常值的范围（Q1 – 1.5IQR或大于Q3 + 1.5IQR）出异\r\n",
    "在计算数据集的四分位数时，除了要先对数据集排序外，还要根\n",
    "据其中数据的总数量选择不同的计算方式：当数据的数量为\r\n",
    "数时，数据集被中位数划分为个数相等（每组有n/2个）的组\r\n",
    "数，其中第一组数的中位数为Q1，第二组数的中位数为Q；当\r\n",
    "数据的总数量为奇数时，中位数会将数据集划分为个数等（每\r\n",
    "组有 (n-1)/2 个）的两组数，其中第一组数的中数为Q，第二组\r\n",
    "数的中数为Q3常值。\r",
    "h’。rgs)网格线。为异常值。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "3cc8d7cc-862e-49d6-af44-6f99e8d9d7b9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiYAAAGdCAYAAAAmK7htAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAdHElEQVR4nO3df4zV9Z3v8dcwDFMYZqbYihWBygq9NBdM08ItXYNKhdmIpY4Gvbut2dhVs1tjYoPGBLe7W3dRsrZs2rVttukfNZu77XXFX1PAG4aV6qi1KGoFd7X0LhiuYyt1IoMw0uMw9w8vkztF6wBHzmeGxyMhcb6/fJ8/vnOe+X6/c07dwMDAQAAACjCm1gMAABwmTACAYggTAKAYwgQAKIYwAQCKIUwAgGIIEwCgGMIEACjG2FoPcLQOHTqU7u7uNDc3p66urtbjAADDMDAwkH379mXKlCkZM+bdr4uMuDDp7u7OtGnTaj0GAHAMdu/enalTp77r+hEXJs3NzUnefmEtLS01ngaopkqlko0bN6atrS0NDQ21Hgeoot7e3kybNm3wffzdjLgwOXz7pqWlRZjAKFOpVDJhwoS0tLQIExil3usxDA+/AgDFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gARejv78/DDz+cRx55JA8//HD6+/trPRJQA8IEqLl77703M2fOzJIlS/IP//APWbJkSWbOnJl777231qMBJ5gwAWrq3nvvzfLlyzN37tx0dXXlRz/6Ubq6ujJ37twsX75cnMBJpm5gYGCg1kMcjd7e3rS2tmbv3r2+KwdGuP7+/sycOTNz587N/fffn/7+/mzYsCFLly5NfX192tvbs3379uzYsSP19fW1Hhc4DsN9/3bFBKiZrq6u7Nq1KzfffHPGjBn662jMmDFZuXJldu7cma6urhpNCJxowgSomVdeeSVJMmfOnHdcf3j54e2A0U+YADVz+umnJ0m2b9/+jusPLz+8HTD6CROgZhYuXJgzzzwzt912Ww4dOjRk3aFDh7J69erMmDEjCxcurNGEwIkmTICaqa+vz5o1a7Ju3bq0t7fniSeeSF9fX5544om0t7dn3bp1+cY3vuHBVziJjK31AMDJ7dJLL83atWtzww035Nxzzx1cPmPGjKxduzaXXnppDacDTjR/LgwUob+/P5s3b86DDz6YCy+8MIsWLXKlBEaR4b5/u2ICFKG+vj7nnXde9u/fn/POO0+UwEnKMyYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFCMowqT1157LX/yJ3+SlpaWjBs3Lueff3527do1uH7Lli1ZsGBBmpub09bWlt27dw/Z/4033shVV12VU045JTNnzszdd99dlRcBAIwORxUml19+eR555JH89V//dW6//fZs27Yty5cvT5Ls2rUrbW1taWpqytq1a3PGGWfkoosuSqVSGdz/iiuuyL333ptvfvOb+epXv5orr7wyP/3pT6v7igCAEWvscDfctGlTfvazn2X79u0588wzkyTNzc25+uqrs3Pnznz961/PuHHj0tHRkaampixevDizZs3K/fffn8suuyxPPvlkHnjggdx11125/PLLkyQ7d+7MqlWrsn79+vflxQEAI8uwr5jMnz8/W7ZsGYySJPnQhz6UJPntb3+bTZs25eKLL05TU1OSpL6+PsuWLcumTZuSJJ2dnZkwYUIuueSSwf3b29uzefPm9Pf3V+O1AAAj3LCvmLS2tqa1tXXIsgcffDCnnnpqZs2ale7u7px99tlD1k+fPn3wakh3d3dmz56dhoaGIev7+vry8ssvZ/r06e/4/z148GAOHjw4+HNvb2+SpFKpDLlNBIx8h89p5zaMPsM9r4cdJr/rP//zP3PnnXfma1/7WsaMGZO+vr5MmjRpyDbjx4/Pnj17kuRd1yfJnj173jVMVq9enVtuueWI5Rs3bsyECROOdXygYJ2dnbUeAaiyAwcODGu7YwqTQ4cO5Utf+lKmTp2a66+/PknS2NiY+vr6IduNGzcufX19v3d9ksFt3snKlSuzYsWKwZ97e3szbdq0tLW1paWl5VjGBwpVqVTS2dmZJUuWDLm6Cox8h+94vJdjCpO///u/z2OPPZZHHnlk8KrF5MmT093dPWS7np6ewWdOJk+enK6uriPWJxnc5p00NjamsbHxiOUNDQ1+ccEo5fyG0We45/RRf8DaQw89lL/6q7/Krbfemj/8wz8cXL5gwYI89thjQ7bdunVrpkyZMrj+xRdfzGuvvTZkfZLBbQCAk9tRhcm///u/Z/ny5fnc5z6Xm266aci65cuXZ/369dm2bVuStz/XpKOjI4sXL06SXHDBBZk4cWLWrFmTJBkYGMgdd9yROXPm5LTTTqvGawEARrhh38qpVCpZvnx56urqcv311w9e7UiSGTNmpL29PfPmzcv555+fZcuWZePGjTn11FNzzTXXJHn7Es6qVaty3XXX5fnnn09PT08effTR3HfffdV/VQDAiDTsKybbt2/Pf/zHf6Snpyef/exnM3/+/MF/P/7xjzN27Nh0dnbm6quvznPPPZdFixbl8ccfH/KA6rXXXpt77rknr7/+eurq6tLR0ZH29vb343UBACNQ3cDAwECthzgavb29aW1tzd69e/1VDowylUolGzZsyNKlSz38CqPMcN+/fbswAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxjjlMOjs7c9ZZZx2xfMqUKamrqxvy75/+6Z8G17/xxhu56qqrcsopp2TmzJm5++67j3UEAGCUGXssO73wwgv5whe+kKampiHLf/3rX+eVV17JunXrctpppw0u/+hHPzr431dccUUefvjhfOtb38qhQ4dy5ZVXZurUqfnMZz5zjC8BABgtjjpMtmzZkgsvvDBnnXVWXn311SHrnnnmmTQ3N2fp0qWpq6s7Yt8nn3wyDzzwQO66665cfvnlSZKdO3dm1apVWb9+/TG+BABgtDjqWzmPPPJI1qxZk2uvvfaIdU8//XTmzZv3jlGSvH37Z8KECbnkkksGl7W3t2fz5s3p7+8/2lEAgFHmqK+YrFixImPGjMmdd955xLqtW7fmpZdeyplnnplf//rXmTt3bv7u7/4uf/RHf5Qk6e7uzuzZs9PQ0DC4z/Tp09PX15eXX34506dPP+KYBw8ezMGDBwd/7u3tTZJUKpVUKpWjHR8o2OFz2rkNo89wz+ujDpMxY979IsuWLVsyefLk3HTTTfngBz+Yb3/721m2bFmee+65zJ49O319fZk0adKQfcaPH58k2bNnzzuGyerVq3PLLbccsXzjxo2ZMGHC0Y4PjACdnZ21HgGosgMHDgxru2N6+PXdPPDAA5k1a1aam5uTJBdccEFmzZqV73//+1mzZk0aGxtTX18/ZJ9x48YlSfr6+t7xmCtXrsyKFSsGf+7t7c20adPS1taWlpaWao4P1FilUklnZ2eWLFky5MoqMPIdvuPxXqoaJp/85CeHHnzs2Hz605/Os88+mySZPHlyurq6hmzT09OTJEf8hc9hjY2NaWxsPGJ5Q0ODX1wwSjm/YfQZ7jldtQ9Y279/fx566KEjlvf09OTNN99MkixYsCAvvvhiXnvttcH1W7duTfL2558AACe3qoXJzp07s3jx4uzYsWNw2a5du9LV1ZUFCxYkefvWzsSJE7NmzZokycDAQO64447MmTNnyOeeAAAnp6rdypkzZ07OPffcXHzxxbn++uvz29/+NmvWrElTU1O+8pWvJHn7Ms6qVaty3XXX5fnnn09PT08effTR3HfffdUaAwAYwar6XTn/+q//mjlz5uTGG2/M1772tZxzzjl57rnnMm3atMFtrr322txzzz15/fXXU1dXl46OjrS3t1dzDABghKobGBgYqPUQR6O3tzetra3Zu3evv8qBUaZSqWTDhg1ZunSph19hlBnu+7dvFwYAiiFMAIBiCBMAoBjCBAAohjABAIohTACAYggTAKAYwgQAKIYwAQCKIUwAgGIIEwCgGMIEACiGMAEAiiFMAIBiCBMAoBjCBAAohjABAIohTACAYggTAKAYwgQAKIYwAQCKIUwAgGIIEwCgGMIEACiGMAEAiiFMAIBiCBMAoBjCBAAohjABAIohTACAYggTAKAYwgQAKIYwAQCKIUwAgGIIEwCgGMIEACiGMAEAiiFMAIBiCBMAoBjCBAAohjABAIohTACAYggTAKAYwgQAKIYwAQCKIUwAgGIIEwCgGMIEACiGMAEAiiFMAIBiCBMAoBjCBAAohjABAIohTACAYggTAKAYwgQAKIYwAQCKIUwAgGIIEwCgGMIEACiGMAEAiiFMAIBiCBMAoBjCBAAohjABAIohTACAYggTAKAYxxwmnZ2dOeuss45YvmXLlixYsCDNzc1pa2vL7t27h6x/4403ctVVV+WUU07JzJkzc/fddx/rCADAKHNMYfLCCy/kC1/4Qvr7+4cs37VrV9ra2tLU1JS1a9fmjDPOyEUXXZRKpTK4zRVXXJF777033/zmN/PVr341V155ZX76058e36sAAEaFsUe7w5YtW3LhhRfmrLPOyquvvjpk3e23355x48alo6MjTU1NWbx4cWbNmpX7778/l112WZ588sk88MADueuuu3L55ZcnSXbu3JlVq1Zl/fr11XlFAMCIddRXTB555JGsWbMm11577RHrNm3alIsvvjhNTU1Jkvr6+ixbtiybNm1K8vbtnwkTJuSSSy4Z3Ke9vT2bN28+4uoLAHDyOeowWbFiRa688sp3XNfd3Z2zzz57yLLp06dnx44dg+tnz56dhoaGIev7+vry8ssvH+0oAMAoc9S3csaMefeW6evry6RJk4YsGz9+fPbs2fN71yfJnj17Mn369COOefDgwRw8eHDw597e3iRJpVIZ8uwKMPIdPqed2zD6DPe8Puow+X0aGxtTX18/ZNm4cePS19f3e9cnGdzmd61evTq33HLLEcs3btyYCRMmVGNsoDCdnZ21HgGosgMHDgxru6qGyeTJk9Pd3T1kWU9Pz+AzJ5MnT05XV9cR65MMbvO7Vq5cmRUrVgz+3Nvbm2nTpqWtrS0tLS3VHB+osUqlks7OzixZsmTILV9g5Dt8x+O9VDVMFixYkMceeyw33HDD4LKtW7dmypQpg+tvu+22vPbaa/nQhz40uD7J4Da/q7GxMY2NjUcsb2ho8IsLRinnN4w+wz2nq/rJr8uXL8/69euzbdu2JG9/rklHR0cWL16cJLngggsyceLErFmzJkkyMDCQO+64I3PmzMlpp51WzVEAgBGoqldM2tvbM2/evJx//vlZtmxZNm7cmFNPPTXXXHNNkrdradWqVbnuuuvy/PPPp6enJ48++mjuu+++ao4BAIxQVb1iMnbs2HR2dubqq6/Oc889l0WLFuXxxx8f8izItddem3vuuSevv/566urq0tHRkfb29mqOAQCMUHUDAwMDtR7iaPT29qa1tTV79+718CuMMpVKJRs2bMjSpUs9YwKjzHDfv327MABQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMaoeJhs3bkxdXd0R/958880kyZYtW7JgwYI0Nzenra0tu3fvrvYIAMAIVfUweeaZZzJ//vw8+eSTQ/41NjZm165daWtrS1NTU9auXZszzjgjF110USqVSrXHAABGoLHVPuAzzzyTT3/605k3b94R626//faMGzcuHR0daWpqyuLFizNr1qzcf//9ueyyy6o9CgAwwlT9isnTTz+d+fPnv+O6TZs25eKLL05TU1OSpL6+PsuWLcumTZuqPQYAMAJVNUz27t2bX/7yl/n2t7+d1tbWTJo0KVdccUV+9atfJUm6u7tz9tlnD9ln+vTp2bFjRzXHAABGqKreynnqqacyMDCQT33qU7n11lvT3d2dm2++OZdddlm6urrS19eXSZMmDdln/Pjx2bNnz7se8+DBgzl48ODgz729vUmSSqXi2RQYZQ6f085tGH2Ge15XNUzmzZuXZ555Jp/4xCcGl02dOjWLFy/Otm3b0tjYmPr6+iH7jBs3Ln19fe96zNWrV+eWW245YvnGjRszYcKEqs0OlKOzs7PWIwBVduDAgWFtV9UwaW1tHRIlSXLOOeckSZ599tlMnjw53d3dQ9b39PQMPnPyTlauXJkVK1YM/tzb25tp06alra0tLS0t1RseqLlKpZLOzs4sWbIkDQ0NtR4HqKLDdzzeS1XDZOfOndm/f3/mzJkzuKynpydJ8uabb2bBggV57LHHcsMNNwyu37p1a6ZMmfKux2xsbExjY+MRyxsaGvziglHK+Q2jz3DP6ao+/Pqd73wnX/7yl4cs++d//uckyYIFC7J8+fKsX78+27ZtS5Ls2rUrHR0dWbx4cTXHAABGqKpeMbnqqqvyne98J3/6p3+aJUuW5Jlnnsk//uM/5rLLLsvcuXPz8Y9/PPPmzcv555+fZcuWZePGjTn11FNzzTXXVHMMAGCEquoVk49//ON54IEH8vOf/zzXXHNNfvzjH+frX/96fvSjHyVJxo4dm87Ozlx99dV57rnnsmjRojz++OOeFQEAkiR1AwMDA7Ue4mj09vamtbU1e/fuFTQwylQqlWzYsCFLly71jAmMMsN9//btwgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUQ5gAAMUQJgBAMYQJAFAMYQIAFEOYAADFECYAQDGECQBQDGECABRDmAAAxRAmAEAxhAkAUAxhAgAUY2ytBwBGtgMHDuSFF16oyrHe6DuYx7f970z68FOZOL7xuI83e/bsTJgwoQqTASeKMAGOywsvvJBPfepTVT3m7VU6ztatW/PJT36ySkcDTgRhAiexnb/Zn/0H3zquY/S3nJ67HvxJVeZ56Tdv5B82/TIrFs/MRz888biP199yera/vPe4j9PUODYzPtx03McB3pswgZPUzt/sz6Jv/KTWYxyh8SMz853tSfLG8R/sJ08f/zH+n803ni9O4AQQJnCSOnyl5Jv//ROZOfn4r05Uw/6+g1n3k5/mc+d/Jk1VeMakGn756hv5yl3PHveVJWB4hAmcxOrG9qb+Ay9nzAfKCJPxY9/KlEndGd/8q4wZW8avp/oPvJG6sb21HgNOGmWc+UBNNHzwZ7l5y221HuMI3/1f3631CEM0fPCCJEtrPQacFIQJnKT6Kv2pvP7pfHn+54u5ldN38LfpempbFs6bm/GN42o9TpJkd8+BfH3HK7UeA04awgROUv/71Tcy8FZLvvVgX5K+Wo/z/5meH//y+P+Sprpa0tTo1yWcCM40OEm1/dePJEnOmjwx4xvqazzN2158ZW9uWLsta5bPzX85vbXW4wzy58Jw4ggTOEmd0jQuf/zfptd6jCHeeuvtv3w569SmzDmjnDABThzflQMAFMMVE+C4VOu7cvr7+9OxqSuvP/5s7ml9MZXFC1Nff3y3mHxXDow8dQMDAwO1HuJo9Pb2prW1NXv37k1LS0utx4GT3tNPP13178qpFt+VA+UY7vu3KybAcZk9e3a2bt16zPs/9NBDuemmm7Jw4cL88ReuyC/+z2/ysakfzv/84f9IV1dXbr/99nz2s5895tmAkcUVE6Bm+vv7M3PmzMydOzf3339/+vv7s2HDhixdujT19fVpb2/P9u3bs2PHjuO+rQPU1nDfvz38CtRMV1dXdu3alZtvvjljxgz9dTRmzJisXLkyO3fuTFdXV40mBE40YQLUzCuvvP2JqnPmzHnH9YeXH94OGP2ECVAzp59+epJk+/bt77j+8PLD2wGjnzABambhwoU588wzc9ttt+XQoUND1h06dCirV6/OjBkzsnDhwhpNCJxowgSomfr6+qxZsybr1q1Le3t7nnjiifT19eWJJ55Ie3t71q1bl2984xsefIWTiD8XBmrq0ksvzdq1a3PDDTfk3HPPHVw+Y8aMrF27NpdeemkNpwNONH8uDBShv78/mzdvzoMPPpgLL7wwixYtcqUERhEfsAaMKPX19TnvvPOyf//+nHfeeaIETlKeMQEAiiFMAIBiCBMAoBjCBAAohjABAIohTACAYggTAKAYwgQAKIYwAQCKMeI++fXwJ+j39vbWeBKg2iqVSg4cOJDe3t40NDTUehygig6/b7/XN+GMuDDZt29fkmTatGk1ngQAOFr79u1La2vru64fcV/id+jQoXR3d6e5uTl1dXW1Hgeoot7e3kybNi27d+/2JZ0wygwMDGTfvn2ZMmVKxox59ydJRlyYAKOXbw8HPPwKABRDmAAAxRAmQDEaGxvzN3/zN2lsbKz1KECNeMYEACiGKyYAQDGECQBQDGECABRDmABF2LVrV+rq6rJr16733PbOO+/MmWee+b7PBJx4wgQAKIYwAQCKIUwAgGIIE+B999Zbb+Xmm2/ORz7ykTQ1NeWSSy7J7t27h73/li1bMm/evHzgAx/IOeeck5deeul9nBaoJWECvO/+/M//PN/97nfzt3/7t7n77rvz0ksv5bzzzktvb+977rtv37587nOfS3Nzczo6OtLW1pZbb731BEwN1MLYWg8AjG47d+7MD37wg3zve9/LNddckyQ5++yzM3PmzPzgBz/I9ddf/3v3/5d/+Zf09PTkhz/8YU4//fS0tbXl5z//eZ5++ukTMT5wgrliAryvnnrqqQwMDGTx4sWDy6ZOnZqPfexjefLJJ99z/1/84heZPn16Tj/99MFl55577vsyK1B7wgR4Xx3+Oq66urohy8eMGZPhfFXXoUOHUl9fP2TZ7/4MjB7CBHhfzZs3L3V1dfm3f/u3wWUvv/xyXnjhhcyfP/899585c2Zeeuml7NmzZ3DZY4899r7MCtSeMAHeV3/wB3+QL33pS7nxxhvz/e9/Pxs2bMjFF1+cKVOm5M/+7M/ec/8vfvGLmThxYr74xS9m06ZNue2223LPPfecgMmBWhAmwPvue9/7Xv7iL/4if/mXf5nly5dn6tSpefjhh9PS0vKe+06aNCmdnZ3Zt29fPv/5z+e+++7LjTfeeAKmBmqhbmA4N3kBAE4AV0wAgGIIEwCgGMIEACiGMAEAiiFMAIBiCBMAoBjCBAAohjABAIohTACAYggTAKAYwgQAKMb/BY+EXPH2SlfuAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "下限为14.5,上限为26.5\n",
      "下限为14.5,上限为26.5\n",
      "下限为14.5,上限为26.5\n",
      "Series([], Name: old, dtype: int64)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<Axes: >"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh4AAAGdCAYAAABdD3qhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAgaUlEQVR4nO3df3BU9b3/8dcmu1lMyC7e8FuCCQH5Fi9WZozNFyi/CqlYrDIuaUG00InTSn+kRqRDIZG0YgxtOrZaf9QiuX6rba0KJQqYpKhRFCJYMREC5UcQ5FoJaDYQutkk+/2DsvduE2B3s/lsNj4fM0xmzzl79j2ZObtPzm7OWnw+n08AAAAGxEV7AAAA8PlBeAAAAGMIDwAAYAzhAQAAjCE8AACAMYQHAAAwhvAAAADGEB4AAMAYa7QH+N86Ojp0/PhxJScny2KxRHscAAAQBJ/Pp+bmZg0fPlxxcRc/p9GrwuP48eNKTU2N9hgAACAMR48e1YgRIy66Ta8Kj+TkZEnnBnc4HFGeBkAkeb1eVVRUKDs7WzabLdrjAIggt9ut1NRU/+v4xfSq8Dj/9orD4SA8gD7G6/UqMTFRDoeD8AD6qGA+JsGHSwEAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwJKTxOnjyp+fPny+FwKCEhQdOmTVNDQ4N/fXNzs1avXq2cnBx9//vf1/vvvx/peQEAQAwLKTxycnJUXV2twsJCrVmzRrW1tXK5XJIkj8ejKVOmaMOGDRo3bpw++OADXX/99cQHAADwC/qS6VVVVdqxY4fq6uqUlpYm6dx3q+Tm5urw4cN64YUX9M9//lPvvfee7Ha72tvbdfXVV+upp57SQw891EPjAwCAWBJ0eGRmZqqmpsYfHZKUkpIiSWptbVVmZqYmT54su90uSYqPj9eQIUPk8XgiOzEAAIhZQYeH0+mU0+kMWLZ582YNGjRIY8aM0dixYwPWvfHGG3rrrbdUUFBwwX16PJ6AMHG73ZLOfZmU1+sNdjQAPailpUX79u3r9n5On/XordqDSh6wXf0vs0dgMmns2LFKTEyMyL4AhC+U1+ywv5320KFDKisr06pVqxQX9z8fFdm/f7+WLFmi1157TcXFxZo5c+YF91FcXKyioqJOyysqKngyAXqJgwcP6p577onY/tZEbE9SaWmpMjIyIrhHAOFoaWkJeluLz+fzhfoAHR0dmj59uo4dO6ba2tqASDhx4oSefPJJrV27VjabTa+++qqGDRvW5X66OuORmpqqxsZGORyOUMcC0AMidcZj/3836d71e/TzueN01TDnpe8QBM54AL2D2+3WwIED1dTUdMnX77DCo7i4WAUFBaqurtbEiRO73Ob06dMaP368Zs6cqSeffDLowZ1OZ1CDA4gt7x05qVse264Nd2Xp2itToj0OgAgK5fU75AuIbd26VQUFBVq9enVAdHzyySdqamry3+7fv7+mT5+uvXv3hvoQAACgjwopPPbs2SOXy6U5c+Zo2bJlAevmzZun5cuXByzbt29fwF/BAACAz7egP1zq9XrlcrlksViUl5enXbt2+delp6crPz9fc+fO1YABA5Sdna3y8nLt3LlTpaWlPTI4AACIPUGHR11dnf9tkxkzZgSsW7dunRYtWqSnn35aDzzwgH71q19p/Pjx2rJli7KysiI7MQAAiFlBh8eECRN0qc+hLly4UAsXLuz2UAAAoG/i22kBAIAxhAcAADCG8AAAAMYQHgAAwBjCAwAAGEN4AAAAYwgPAABgDOEBAACMITwAAIAxhAcAADCG8AAAAMYQHgAAwBjCAwAAGEN4AAAAYwgPAABgDOEBAACMITwAAIAxhAcAADCG8AAAAMYQHgAAwBjCAwAAGEN4AAAAYwgPAABgDOEBAACMITwAAIAxhAcAADCG8AAAAMYQHgAAwBjCAwAAGEN4AAAAYwgPAABgDOEBAACMITwAAIAxhAcAADCG8AAAAMaEFB4nT57U/Pnz5XA4lJCQoGnTpqmhocG//je/+Y2uvPJKWa1WjRgxQs8++2yk5wUAADEspPDIyclRdXW1CgsLtWbNGtXW1srlckmSnn76af3whz/U7Nmz9bvf/U4ZGRm6/fbbtXv37h4ZHAAAxB5rsBtWVVVpx44dqqurU1pamiQpOTlZubm5OnjwoAoLC1VSUqKlS5dKklwulwYPHqwXX3xRX/ziF3tkeAAAEFuCDo/MzEzV1NT4o0OSUlJSJEltbW1av369xo0b51/Xv39/2e12tba2Rm5aAAAQ04IOD6fTKafTGbBs8+bNGjRokMaMGaO4uMB3bd5++2199tlnmjRp0gX36fF45PF4/Lfdbrckyev1yuv1BjsagC40nDyjM572aI/ht//jpoCfvUmSPV5pKUnRHgOIWaG8ZgcdHv/u0KFDKisr06pVqzpFhyQVFhbqC1/4gm688cYL7qO4uFhFRUWdlldUVCgxMTHc0YDPvU/OSqvfC/vw7lHL1u+N9ghdWnFtmwZfFu0pgNjU0tIS9LYWn8/nC/UBOjo6NH36dB07dky1tbWdImHt2rXKzc3V5s2bdcMNN1xwP12d8UhNTVVjY6McDkeoYwH4lw+Ou3XLY9v1C9d4jR7UO/4nf+afHm154x3d8OVMJfWzR3scvwMnzmjp87XacFeWrh7O8w4QDrfbrYEDB6qpqemSr99h/ZeopKRE27ZtU3V1dafo2L9/v+6++27dddddF40OSbLb7bLbOz8B2Ww22Wy2cEYDIMlqPXdo/59hTv3nFc5LbG2G1+tVY710/ahBver4Pv+7slqtvWouIJaEcuyEfAGxrVu3qqCgQKtXr9bEiRMD1p05c0a33nqrMjIy9Mtf/jLUXQMAgD4upDMee/bskcvl0pw5c7Rs2bKAde3t7crJydFHH32knTt3ql+/fhEdFAAAxL6gw8Pr9crlcslisSgvL0+7du3yr0tPT9fjjz+uTZs26f7779epU6d06tQpSeeu9TF27NjITw4AAGJO0OFRV1envXvPfRp9xowZAevWrVun5557TpK0cuVKrVy50r9u6tSpeu211yIwKgAAiHVBh8eECRN0sT+AWbRoUSTmAQAAfRjfTgsAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMCak8Dh58qTmz58vh8OhhIQETZs2TQ0NDQHbnD59WuPGjdNrr70WwTEBAEBfYA1l45ycHNXX16uwsFBWq1U/+9nP5HK5tHPnTkmS1+vVHXfcob179/bIsAAAILYFHR5VVVXasWOH6urqlJaWJklKTk5Wbm6uDh8+rGHDhummm25SU1NTT80KAABiXNBvtWRmZqqmpsYfHZKUkpIiSWptbdXHH3+slJQUbd26NeJDAgCAviHoMx5Op1NOpzNg2ebNmzVo0CCNGTNGkvTHP/4xpAf3eDzyeDz+2263W9K5t2y8Xm9I+wLwP9ra2vw/e8uxdH6O3jLPeb3xdwXEmlCOnZA+4/G/HTp0SGVlZVq1apXi4sL745ji4mIVFRV1Wl5RUaHExMRwRwM+946eliSr3nzzTR3pH+1pAlVWVkZ7hAC9+XcFxIqWlpagt7X4fD5fqA/Q0dGh6dOn69ixY6qtre0UCRaLRa+++qqmTZt20f10dcYjNTVVjY2NcjgcoY4F4F8+OO7WLY9t14a7snT18N5xLHm9XlVWVmrWrFmy2WzRHsevN/6ugFjjdrs1cOBANTU1XfL1O6wzHiUlJdq2bZuqq6u7dWbCbrfLbrd3Wm6z2XrVExMQa6xWq/9nbzuWetvx3Zt/V0CsCOXYCfk9kq1bt6qgoECrV6/WxIkTQ707AAD4HAspPPbs2SOXy6U5c+Zo2bJlPTUTAADoo4J+q8Xr9crlcslisSgvL0+7du3yr0tPT/f/aS0AAMCFBB0edXV1/iuSzpgxI2DdunXrtGjRoogOBgAA+p6gw2PChAkK9g9gwvhDGQAA8DnAt9MCAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwJKTxOnjyp+fPny+FwKCEhQdOmTVNDQ4N/fU1NjbKyspScnKzs7GwdPXo00vMCAIAYFlJ45OTkqLq6WoWFhVqzZo1qa2vlcrkkSQ0NDcrOzlZSUpKef/55XXHFFfra174mr9fbI4MDAIDYYw12w6qqKu3YsUN1dXVKS0uTJCUnJys3N1eHDx/Wz3/+cyUkJGjjxo1KSkrSzJkzNWbMGG3YsEHz5s3rqfkBAEAMCfqMR2ZmpmpqavzRIUkpKSmSpNbWVlVVVenmm29WUlKSJCk+Pl433XSTqqqqIjsxAACIWUGf8XA6nXI6nQHLNm/erEGDBmnMmDE6fvy4rrnmmoD1I0eO1Msvv3zBfXo8Hnk8Hv9tt9stSfJ6vbxFA3RDW1ubLFa3Dny6Rx3WpGiPI+ncTMfbjqv2k1pZrUE/9fS4Q5+ekcXqVltbG887QJhCOXbCPvoPHTqksrIyrVq1SnFxcTp79qwuv/zygG0uu+wynThx4oL7KC4uVlFRUaflFRUVSkxMDHc04HPv6GnJNmCHCnY9EO1ROnm06tFoj9CJbcBX9OabiTrSP9qTALGppaUl6G3DCo+Ojg4tXrxYI0aMUF5eniTJbrcrPj4+YLuEhASdPXv2gvtZvny58vPz/bfdbrdSU1OVnZ0th8MRzmgAJH1w3K3SJ1u05oZvaNSg3nPGY8f2HfpS1pd61xmPE2d09x8bNHneZF09nOcdIBzn37EIRlhHf0lJibZt26bq6mr/mYnBgwfr+PHjAdudOnXK/5mPrtjtdtnt9k7LbTabbDZbOKMBkGS1WuVrc2j05eP0n0Ocl76DAV6vV0etRzV+8PhedXzHtTXJ13ZKVqu1V80FxJJQjp2QLyC2detWFRQUaPXq1Zo4caJ/eVZWlrZt2xaw7a5duzR8+PBQHwIAAPRRIYXHnj175HK5NGfOHC1btixgncvl0ssvv6za2lpJ567rsXHjRs2cOTNy0wIAgJgW9FstXq9XLpdLFotFeXl52rVrl39denq6brnlFl133XWaNm2abrrpJlVUVGjQoEG68847e2RwAAAQe4I+41FXV6e9e/fq1KlTmjFjhjIzM/3/ysvLZbVaVVlZqdzcXL3//vuaPn263nrrLT4kCgAA/II+4zFhwgT5fL6LbpOYmKiSkhKVlJR0ezAAAND38O20AADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjwg6PyspKZWRkBCzr6OjQgw8+qNGjR6tfv36aPHmytm/f3u0hAQBA3xBWeNTX12vBggVqb28PWH7vvfeqpKREK1asUEVFhYYOHaoZM2aotrY2IsMCAIDYFnJ41NTUaNKkSUpPTw9Y/umnn+qRRx5RcXGxFi9erClTpuhPf/qTBg8erIcffjhiAwMAgNgVcnhUV1ertLRUS5YsCVheW1ur1tZWZWdn+5fFx8drzJgxamho6PagAAAg9llDvUN+fr7i4uJUVlYWsDw+Pl6S1NjYqFGjRkmS2tvbVV9fr1mzZnW5L4/HI4/H47/tdrslSV6vV16vN9TRAPxL89lzx9XuD0+pra0tytOcc+afHu08IQ08dEJJ/ezRHsfvwIkzkqS2tjaed4AwhXLshBwecXFdnyS59tpr5XA49OMf/1jr169XcnKyVq5cqWPHjumWW27p8j7FxcUqKirqtLyiokKJiYmhjgbgX97+h0VSvFb8ZU+0R/k3Vv2/A3+L9hBdeuftN3XksmhPAcSmlpaWoLe1+Hw+XzgPUlZWplWrVgW8jfLcc89p4cKFiouLk9Vq1ZkzZzRq1Cjt27dPVmvnxunqjEdqaqoaGxvlcDjCGQuApFNnWlW19xONGpSky2zx0R5HkrT/4yYtW79Xa+Z+QVcNdUZ7nABJ9nilpSRFewwgZrndbg0cOFBNTU2XfP0O+YzHxeTk5Gjq1KmqrKzUjh079Mgjj6igoKDL6JAku90uu73zKVebzSabzRbJ0YDPlSEDbLrt/6ZfesMouGqoU9demRLtMQBEUCiv2RG/gNiQIUO0cOFCHThwQOPGjdPtt98e6YcAAAAxKqJnPM579913tWXLFpWXl/s/dAoAANAjl0xfvny5pk+frjlz5vTE7gEAQIyK+BmP119/XVVVVdq5c2ekdw0AAGJc2Gc8Fi1a1OWFwaZOnar29nZNmDChO3MBAIA+iG+nBQAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYE3Z4VFZWKiMjo9Pyhx9+WBkZGUpMTNQ111yjv/zlL90aEAAA9B1hhUd9fb0WLFig9vb2gOVbtmzRypUrdd9992nz5s3KysrSrbfeqt27d0dkWAAAENtCDo+amhpNmjRJ6enpndY988wzWrhwoe644w5NnTpVv/3tbzV06FCVl5dHZFgAABDbQg6P6upqlZaWasmSJZ3WNTY2qqOjw3/b5/Opra1N/fr1696UAACgT7CGeof8/HzFxcWprKys07pZs2apqKhICxYs0LXXXqtf//rX+uyzzzRv3rwu9+XxeOTxePy33W63JMnr9crr9YY6GoBerK2tzf+T4xvoW0I5pkMOj7i4C58kycvL0yuvvKIpU6b4t12/fr2uvPLKLrcvLi5WUVFRp+UVFRVKTEwMdTQAvdjR05Jk1fbt2/VRXbSnARBJLS0tQW8bcnhczGOPPaZdu3bpF7/4hVJTU/XMM89o/vz5eumllzR9+vRO2y9fvlz5+fn+2263W6mpqcrOzpbD4YjkaACibPeHp6TancrKytIXR/5HtMcBEEHn37EIRsTCo62tTYWFhVq7dq3mzp0rSZo3b56+8pWvaMWKFXrrrbc63cdut8tut3dabrPZZLPZIjUagF7AarX6f3J8A31LKMd0xC4g1tjYqE8//TTg2h4Wi0Xjx4/XwYMHI/UwAAAghkUsPFJSUmSxWLRz507/stbWVlVWVuqKK66I1MMAAIAYFrG3Wmw2m2bPnq0f/ehH2rFjh5xOp1555RXt3btXTz31VKQeBgAAxLCIfrj02WefVWFhoV566SUdO3ZMQ4YM0f3336/FixdH8mEAAECMsvh8Pl+0hzjP7XbL6XSqqamJv2oB+pj3jpzULY9t14a7snTtlSnRHgdABIXy+s230wIAAGMIDwAAYAzhAQAAjCE8AACAMYQHAAAwhvAAAADGEB4AAMAYwgMAABhDeAAAAGMIDwAAYAzhAQAAjCE8AACAMYQHAAAwhvAAAADGEB4AAMAYwgMAABhDeAAAAGMIDwAAYAzhAQAAjCE8AACAMYQHAAAwhvAAAADGEB4AAMAYwgMAABhDeAAAAGMIDwAAYAzhAQAAjCE8AACAMYQHAAAwhvAAAADGEB4AAMAYwgMAABhDeAAAAGMIDwAAYAzhAQAAjAk7PCorK5WRkRGwLC0tTRaLpct/DQ0N3Z0VAADEOGs4d6qvr9eCBQuUlJQUsLy8vFwejydg2e9//3s999xzGjp0aPhTAgCAPiHk8KipqdHs2bOVkZGhTz75JGDd+PHjA263trZq/fr1Wrlypfr169e9SQEAQMwL+a2W6upqlZaWasmSJZfcdu3atbJYLMrNzQ1rOAAA0LeEfMYjPz9fcXFxKisru+h2Pp9PpaWlysvLU0JCQpfbeDyegLdm3G63JMnr9crr9YY6GoBerK2tzf+T4xvoW0I5pkMOj7i44E6SbNq0SR9//LEWL158wW2Ki4tVVFTUaXlFRYUSExNDHQ1AL3b0tCRZtX37dn1UF+1pAERSS0tL0NuG9eHSYDz66KOaP3++BgwYcMFtli9frvz8fP9tt9ut1NRUZWdny+Fw9NRoAKJg94enpNqdysrK0hdH/ke0xwEQQeffsQhGj4RHY2OjKioqtGXLlotuZ7fbZbfbOy232Wyy2Ww9MRqAKLFarf6fHN9A3xLKMd0jFxB74YUX5HQ6NXXq1J7YPQAAiFE9Eh4vvviiZs+e7f8fDgAAgNQD4dHa2qo33nhDkydPjvSuAQBAjIt4eLz99ts6e/asJk6cGOldAwCAGBd2eCxatKjL71+ZOnWqfD5fp6uYAgAA8O20AADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjwg6PyspKZWRkXHD9448/rsTERH344YfhPgQAAOhjwgqP+vp6LViwQO3t7V2u/8c//qHly5dr+fLlGjlyZLcGBAAAfUfI4VFTU6NJkyYpPT39gtvcfffduvzyy3Xvvfd2azgAANC3WEO9Q3V1tUpLSyVJq1at6rS+srJSf/jDH7Rhwwb169ev2wMCAIC+I+TwyM/PV1xcnMrKyjqt83q9+t73vqcBAwaovLxcGzZs0De/+U199atf7XJfHo9HHo/Hf9vtdvv34/V6Qx0NQA9oaWnRvn37ur2f/f/dJM/HB1T3XoJa/+GMwGTS2LFjlZiYGJF9AQhfKK/ZIYdHXNyF35154okn9Pe//139+/fXhx9+qAMHDqisrEylpaXKz8/vtH1xcbGKioo6La+oqODJBOglDh48qHvuuSdi+7v9vyK2K5WWll70Q+4AzGhpaQl6W4vP5/OF8yBlZWVatWqVGhoa/MvGjx+vY8eO6W9/+5vS0tLU0dGhb3zjGyovL9eJEyeUnJwcsI+uznikpqaqsbFRDocjnLEARFikznicPuvRK2+8o69+OVP9L7NHYDLOeAC9hdvt1sCBA9XU1HTJ1++Qz3hczP79+/Wtb31LaWlpks6dHcnNzdXzzz+vPXv26Etf+lLA9na7XXZ75ycgm80mm80WydEAhMnpdOr666/v9n68Xq+aPzulL0/M4vgG+phQjumIXkAsKSlJo0aNClh2/gOmXQUGAAD4fIloeGRmZmr37t0By15//XUlJydr3LhxkXwoAAAQgyL6Vsu9996r2bNnKyMjQ7NmzVJNTY0efPBBLV26VAkJCZF8KAAAEIMiGh4zZ87Uxo0bdf/99+uhhx5S//799YMf/ED33XdfJB8GAADEqLD/qqUnuN1uOZ3OoD4VCyC2eL1ebdq0STfeeCMfLgX6mFBev/l2WgAAYAzhAQAAjCE8AACAMYQHAAAwhvAAAADGEB4AAMAYwgMAABhDeAAAAGMieuXS7jp/LTO32x3lSQBEmtfrVUtLi9xuNxcQA/qY86/bwVyTtFeFR3NzsyQpNTU1ypMAAIBQNTc3y+l0XnSbXnXJ9I6ODh0/flzJycmyWCzRHgdABLndbqWmpuro0aN8JQLQx/h8PjU3N2v48OGKi7v4pzh6VXgA6Lv4LiYAEh8uBQAABhEeAADAGMIDgBF2u1333Xef7HZ7tEcBEEV8xgMAABjDGQ8AAGAM4QEAAIwhPAAAgDGEBwAjGhoaZLFY1NDQcMlty8rKlJaW1uMzATCP8AAAAMYQHgAAwBjCAwAAGEN4AOi2trY2/eQnP9HQoUOVlJSkuXPn6ujRo0Hfv6amRtddd5369eunSZMm6ciRIz04LYBoIjwAdNt3vvMdPfroo/rpT3+qP//5zzpy5IimTp0qt9t9yfs2Nzdrzpw5Sk5O1saNG5Wdna3Vq1cbmBpANFijPQCA2Hb48GGtW7dOTzzxhO68805J0jXXXKPRo0dr3bp1ysvLu+j9n3nmGZ06dUrPPvushg0bpuzsbO3evVvvvvuuifEBGMYZDwDdsnPnTvl8Ps2cOdO/bMSIEbrqqqv0zjvvXPL++/fv18iRIzVs2DD/silTpvTIrACij/AA0C3nv+7JYrEELI+Li1MwXwXV0dGh+Pj4gGX/fhtA30F4AOiW6667ThaLRX/961/9yz766CPV19crMzPzkvcfPXq0jhw5ohMnTviXbdu2rUdmBRB9hAeAbhk1apQWL16spUuX6sknn9SmTZt08803a/jw4fr2t799yfvfdttt6t+/v2677TZVVVXpgQce0AsvvGBgcgDRQHgA6LYnnnhC3/3ud7VixQq5XC6NGDFCr7/+uhwOxyXve/nll6uyslLNzc36+te/rvXr12vp0qUGpgYQDRZfMG/CAgAARABnPAAAgDGEBwAAMIbwAAAAxhAeAADAGMIDAAAYQ3gAAABjCA8AAGAM4QEAAIwhPAAAgDGEBwAAMIbwAAAAxvx/qQ1hKjtgGx8AAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "data = {'name': ['user1', 'user2', 'user3', 'user4','user5','user6','user7','user8','user9',\n",
    "                 'user10','user11', 'user12', 'user13', 'user14','user15','user16','user17',\n",
    "                 'user18'],\n",
    "        'old': [221,21,20,16,13,22,18,19,20,20,19,23,22,20,19,23,22,21],\n",
    "        'weight':[121,122,132,135,128,124,129,133,362,135,128,124,129,135,128,124,129,73]}\n",
    "columns1=['name', 'old','weight']\n",
    "index1=['id1', 'id2','id3','id4','id5','id6','id7','id8','id9','id10','id11', 'id12',\n",
    "        'id13','id14','id15','id16','id17','id18']\n",
    "df1= pd.DataFrame(data,columns=columns1,index=index1)\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib\n",
    "\n",
    "%matplotlib inline\n",
    "font = {\n",
    "    'family':'SimHei',\n",
    "    'weight':'bold',\n",
    "    'size':12\n",
    "}\n",
    "matplotlib.rc(\"font\",**font)\n",
    "matplotlib.rcParams['axes.unicode_minus'] = False\n",
    "df1.boxplot(column='old')\n",
    "plt.show()\n",
    "\n",
    "\n",
    "\n",
    "# 返回异常值\n",
    "def box_outliers(ser):\n",
    "    # 对待检测的数据进行排序\n",
    "    new_ser = ser.sort_values()\n",
    "    # 判断数据的总数量是奇数还是偶数\n",
    "    if new_ser.count() % 2 ==0:\n",
    "        # 计算Q3,Q1,IQR\n",
    "        Q3 = new_ser[int(len(new_ser) / 2):].median()\n",
    "        Q1 = new_ser[:int(len(new_ser) / 2)].median()\n",
    "    elif new_ser.count() % 2 != 0:\n",
    "        Q3 = new_ser[int((len(new_ser)-1) / 2):].median()\n",
    "        Q1 = new_ser[:int((len(new_ser)-1) / 2)].median()\n",
    "    IQR = round(Q3-Q1,1)\n",
    "    ma = round(Q3+1.5*IQR,1)\n",
    "    mi = round(Q1-1.5*IQR,1)\n",
    "\n",
    "    rule = (ma < ser) | (mi > ser)\n",
    "    print(\"下限为{},上限为{}\".format(mi,ma))\n",
    "    index = np.arange(ser.shape[0])[rule]\n",
    "    # 获取异常值及其索引\n",
    "    outliers = ser.iloc[index]\n",
    "    return outliers\n",
    "box_outliers(df1['old'])\n",
    "\n",
    "\n",
    "\n",
    "# 删除指定索引之后，查看异常值情况\n",
    "df1_drop = df1.drop(['id1'])\n",
    "box_outliers(df1_drop['old'])\n",
    "\n",
    "\n",
    "\n",
    "# 基于替换的方式处理异常值：上面看到了最大值和最小值26.5和14.5\n",
    "topnum1 = 26.5\n",
    "bottomnum1 = 14.5\n",
    "\n",
    "replace_value1 = df1['old'][df1['old'] < 26.5].max()\n",
    "df1.loc[df1['old'] > topnum1,'old'] = replace_value1\n",
    "\n",
    "replace_value2 = df1['old'][df1['old'] > bottomnum1].min()\n",
    "df1.loc[df1['old'] < bottomnum1,'old'] = replace_value2\n",
    "\n",
    "df1\n",
    "\n",
    "\n",
    "# 再次判断异常值\n",
    "print(box_outliers(df1['old']))\n",
    "df1.boxplot(column='old')\n",
    "\n",
    "\n",
    "# 直接替换异常值\n",
    "print(df['old']['id1'])\n",
    "print('.........')\n",
    "df1['old'] = df1['old'].replace({221:23})\n",
    "# 根据行索引获取替换后的值\n",
    "df1.loc['id1']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77ae9992-80d1-4bcd-abdc-251d76a58c60",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
